---
title: "In the Hands of a Few: Disaster Recovery Committees in Japan"
output: html_notebook
---

This RMarkdown Document replicates our analysis.

# 0. Load Packages

```{r, message = FALSE, warning = FALSE}
library(tidyverse)
library(tidygraph)
library(ggraph)
library(network)
library(statnet)
library(viridis)
library(GGally)
library(intergraph)
library(readxl)
recode <- dplyr::recode
recode_factor <- dplyr::recode_factor
select <- dplyr::select
```

## 0.1 Data Preparation

```{r, message = FALSE, warning = FALSE, eval = FALSE}
# First, let's import the raw data and save it as .csv files that we can work with
# and that are anonymized. These original data files will not be shared with others,
# so as to preserve the privacy of respondents, but this code shows how we anonymize it.

# First, grab committee names
read_excel("rawdata/committees.xlsx") %>%
  select(committee_id, geography = geo, town, level = 2, 
         committee_japanese, committee_romaji) %>%
  mutate(town = town %>% recode("Kesennuma-shi" = "Kisennuma-shi",
                                "Cabinet" = "National",
                                "Ministry" = "National",
                                "Other" = "National"),
         level = level %>% recode("miscellaneous" = "national"),
         geography = tolower(geography) %>% recode("other" = "national")) %>%
  write_csv("data/committees.csv")

# Second, grab member names. We're going to make this a separate file, 
# so that this is the only way to access the names of members.
read_excel("rawdata/members.xlsx") %>% 
  select(member_id = name_id) %>%
  # For some reason, we have an extra name in there,
  # but they are not listed in any committee, so we're going to take that one out of the final node list.
  filter(member_id != "name_45") %>%
  # Andrew fixed some of their names, so let's join in the correct names
  left_join(y = read_excel("rawdata/student_coding_dataset.xlsx") %>%
              select(member_id, name_first, name_last, name, name_japanese, notes, sources),
            by = "member_id") %>%
  write_csv("data/member_names.csv")

# Third, grab member traits.
read_excel("rawdata/student_coding_dataset.xlsx") %>%
  select(member_id, role, birth_year, age = age_, age_group, gender, 
         social_org = type_social_nonprofit, influential_citizen = type_influential_citizen,
         politician, politician_level, politician_role, politician_party, 
         govt = type_govt, 
         govt_natl = govt_level_national, govt_pref = govt_level_prefectural,
         govt_muni = govt_level_municipal, research = type_education,
         business = type_business, 
         fisheries = type_fisheries, agriculture = type_agriculture,
         emergency_services = type_emergency_services,
          health_care = type_health_care, 
         lawyer = type_lawyer, religious = type_religious,
         media = type_media,outside_FMI_before_3_11) %>%
  mutate(role = tolower(role),
         birth_year = as.numeric(birth_year)) %>%
  mutate(community_participation = if_else(
    social_org == "yes" | influential_citizen == "yes", 
    true = "yes", false = "no", missing = NA_character_)) %>%
  mutate(community_participation_relig = if_else(
    social_org == "yes" | influential_citizen == "yes" | religious == "yes",
    true = "yes", false = "no", missing = NA_character_)) %>%
  write_csv("data/members.csv")


# Fourth, grab edges
read_excel("rawdata/edgelist.xlsx") %>% 
  select(committee_id, member_id) %>%
  group_by(committee_id, member_id) %>%
  summarize(weight = n()) %>%
  ungroup() %>%
  arrange(committee_id, member_id) %>%
  filter(!is.na(committee_id), !is.na(member_id)) %>%
  write_csv("data/edgelist.csv")

read_csv("data/committees.csv")
```



# 1. Build Network

```{r}
# Fill in member traits that are missing with the mode
read_csv("data/members.csv") %>%
  mutate_at(vars(social_org, influential_citizen, religious,
                 community_participation, community_participation_relig,
                 gender, govt, politician, research,
                 business, agriculture, fisheries, health_care, media, emergency_services),
            # If this category is missing
            funs(if_else(is.na(.), 
                         # Fill it in the with mode
                         true = names(table(.) %>% 
                           sort(decreasing = TRUE))[1],
                         # Otherwise, leave the same
                         false = .))) %>%
  write_csv("data/members_imputed.csv")
```


```{r, message= FALSE, warning = FALSE}
# Now, using the member traits and committees traits,
# let's create the full incidence matrix of member-committee ties, and fill it in with traits.
expand_grid(member_id = read_csv("data/members.csv")$member_id,
            committee_id = read_csv("data/committees.csv")$committee_id) %>%
  # Join in edges and weights
  left_join(by = c("member_id", "committee_id"), y = read_csv("data/edgelist.csv")) %>%
  mutate(weight = if_else(is.na(weight), 0, as.numeric(weight))) %>%
  # Join in member traits
  left_join(by = "member_id", y = read_csv("data/members.csv")) %>%
  # Join in committee traits
  left_join(by = "committee_id", y = read_csv("data/committees.csv")) %>%
  # Save as a .rds, for file size conservation and speed
  saveRDS("data/matrix.rds")


# Make a tidygraph object of the network
tbl_graph(
  nodes = bind_rows(read_csv("data/members.csv") %>% 
                      rename(id = member_id), 
                    read_csv("data/committees.csv") %>%
                      rename(id = committee_id)), 
  edges = read_csv("data/edgelist.csv") %>% 
    select(from = committee_id, to = member_id, weight), directed = FALSE, node_key = "id") %>%
  saveRDS("data/network.rds")
```


## 1.2. Create Matrices and Transformations

```{r, message = FALSE, warning = FALSE}
# Generate Incidence Matrix
expand_grid(member_id = read_csv("data/members.csv")$member_id,
            committee_id = read_csv("data/committees.csv")$committee_id) %>%
  # Join in edges and weights
  left_join(by = c("member_id", "committee_id"), y = read_csv("data/edgelist.csv")) %>%
  mutate(weight = if_else(is.na(weight), 0, as.numeric(weight))) %>%
  # pivot into a wide incidence matrix
  pivot_wider(
    id_cols = member_id,
    names_from = committee_id,
    values_from = weight) %>%
  tibble::column_to_rownames(var = "member_id") %>%
  as.matrix() %>%
  saveRDS("data/incidence_matrix.rds")


# Generate member coaffiliation network edgelist
read_rds("data/incidence_matrix.rds") %*% t(read_rds("data/incidence_matrix.rds")) %>%
  # remove the diagonal
  diag.remove(remove.val = 0) %>%
  # Convert to tidy dataframe with rownames
  as.data.frame() %>%
  tibble::rownames_to_column(var = "from") %>%
  # Pivot into a full edgelist
  pivot_longer(
    cols = -from,
    names_to = "to",
    values_to = "weight") %>%
  filter(weight > 0 ) %>%
  saveRDS("data/edgelist_members.rds")

# Generate committee coaffiliation network edgelist
t(read_rds("data/incidence_matrix.rds")) %*% read_rds("data/incidence_matrix.rds") %>%
  # remove the diagonal
  diag.remove(remove.val = 0) %>%
  # Convert to tidy dataframe with rownames
  as.data.frame() %>%
  tibble::rownames_to_column(var = "from") %>%
  # Pivot into a full edgelist
  pivot_longer(
    cols = -from,
    names_to = "to",
    values_to = "weight") %>%
  filter(weight > 0) %>%
  saveRDS("data/edgelist_committees.rds")
```

# 2. Create Network Measures Tables

## 2.1 Block Permutations

```{r, eval = FALSE}
# Let's build a significance test for taking the difference of proportions between Iwate and Miyagi

# In what way shall I block permute this?
# I could block permute this so that...
# The same number of memberships are preserved within each level of government (municipal, prefectural, national) and each prefecture (Iwate, Miyagi, national), meaning 5 blocks total

dir.create("perm")

replicate(n = 1000, 
          expr = read_rds("data/matrix.rds") %>%
            select(member_id, committee_id, weight, geography, level) %>%
            # This will permute the committee assignment of each member,
            # Making sure that a member can only be assigned to a level 
            group_by(geography, level) %>%
            mutate(weight = sample(weight, replace = FALSE)) %>%
            ungroup() %>%
            # Now filter out non-member pairs
            filter(weight > 0) %>%
            # Get rid of extraneous data
            select(member_id, committee_id, weight),
          simplify = FALSE) %>%
  bind_rows(.id = "replicate") %>%
  saveRDS("perm/block_perm.rds")
```

## 2.2 Prefectural Percentages

```{r, message = FALSE, warning = FALSE}
# Now, using these block permutations, we can run ANY function we want on a single set,
# comparing statistics from random committee assignments while knowing that
# we aren't considering any assignments that aren't really historically possible

# Give me the proportion of committee memberships per prefecture
# that are of member type XXX.

# Write a function to get share of government, research, social_org, etc.
# affiliated committee members per prefecture
get_pref = function(data){
  # Print number of replications we have passed
  print(paste(data$replicate[1]))
  
  temp <- data %>%
    # Identify memberships
    filter(weight > 0) %>%
    select(member_id, committee_id) %>%
    # Join in committee traits
    left_join(by = "committee_id", y = read_csv("data/committees.csv")) %>%
    # Join in member traits
    left_join(by = "member_id", y = read_csv("data/members.csv"))
  
  bind_rows(
    # Calculate share of influential citizens/social orgs
    temp %>%
      group_by(geography, community_participation) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(community_participation == "yes") %>%
      select(-community_participation) %>%
      ungroup() %>%
      mutate(type = "community_participation"),
    
       # Calculate share of influential citizens/social orgs/religious orgs
    temp %>%
      group_by(geography, community_participation_relig) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(community_participation_relig == "yes") %>%
      select(-community_participation_relig) %>%
      ungroup() %>%
      mutate(type = "community_participation_relig"),
    
    # Calculate share of government officials
    temp %>%
      group_by(geography, govt) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(govt == "yes") %>%
      select(-govt) %>%
      ungroup() %>%
      mutate(type = "govt"),
    
    # Calculate share of researchers
    temp %>%
      group_by(geography, research) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(research == "yes") %>%
      select(-research) %>%
      ungroup() %>%
      mutate(type = "research"),
    
    # Calculate share of elected officials
    temp %>%
      group_by(geography, politician) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(politician == "yes") %>%
      select(-politician) %>%
      ungroup() %>%
      mutate(type = "politician"),
    
    # Calculate share of social orgs
    temp %>%
      group_by(geography, social_org) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(social_org == "yes") %>%
      select(-social_org) %>%
      ungroup() %>%
      mutate(type = "social_org"),
    # Calculate share of influential citizens
    temp %>%
      group_by(geography, influential_citizen) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(influential_citizen == "yes") %>%
      select(-influential_citizen) %>%
      ungroup() %>%
      mutate(type = "influential_citizen"),
    
    
    # Calculate share of women
    temp %>%
      group_by(geography, gender) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(gender == "woman") %>%
      select(-gender) %>%
      ungroup() %>%
      mutate(type = "gender"),
    
         # Calculate share of business
    temp %>%
      group_by(geography, business) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(business == "yes") %>%
      select(-business) %>%
      ungroup() %>%
      mutate(type = "business"),
    
    # Calculate share of fisheries
    temp %>%
      group_by(geography, fisheries) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(fisheries == "yes") %>%
      select(-fisheries) %>%
      ungroup() %>%
      mutate(type = "fisheries"),
    
    # Calculate share of agriculture
    temp %>%
      group_by(geography, agriculture) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(agriculture == "yes") %>%
      select(-agriculture) %>%
      ungroup() %>%
      mutate(type = "agriculture"),
    
    
    # Calculate share of religious
    temp %>%
      group_by(geography, religious) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(religious == "yes") %>%
      select(-religious) %>%
      ungroup() %>%
      mutate(type = "religious"),
    
    # Calculate share of religious
    temp %>%
      group_by(geography, media) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(media == "yes") %>%
      select(-media) %>%
      ungroup() %>%
      mutate(type = "media"),
    
    # Calculate share of health_care
    temp %>%
      group_by(geography, health_care) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(health_care == "yes") %>%
      select(-health_care) %>%
      ungroup() %>%
      mutate(type = "health_care"),
    
    # Calculate share of health_care
    temp %>%
      group_by(geography, emergency_services) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(emergency_services == "yes") %>%
      select(-emergency_services) %>%
      ungroup() %>%
      mutate(type = "emergency_services"),
    
    
    # Calculate share of health_care
    temp %>%
      group_by(geography, outside_FMI_before_3_11) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(outside_FMI_before_3_11 == "yes") %>%
      select(-outside_FMI_before_3_11) %>%
      ungroup() %>%
      mutate(type = "outside_FMI_before_3_11"),
    
    # Calculate share of health_care
    temp %>%
      group_by(geography, lawyer) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(lawyer == "yes") %>%
      select(-lawyer) %>%
      ungroup() %>%
      mutate(type = "lawyer")) %>%
    
    return()
}

# Extract results across all block permutations
system.time(
  read_rds("perm/block_perm.rds") %>%
    split(.$replicate) %>%
    map_dfr(~get_pref(.), .id = "replicate") %>%
    ungroup() %>%
    saveRDS("perm/null_pref.rds")
)
```

## 2.3 Committee Percentages

```{r, message = FALSE, warning = FALSE}
# Write a function to extract the same data, but for each committee
get_committee = function(data){
  # Print number of replications we have passed
  print(paste(data$replicate[1]))
  
  temp <- data %>%
    # Identify memberships
    filter(weight > 0) %>%
    select(member_id, committee_id) %>%
    # Join in member traits
    left_join(by = "member_id", y = read_csv("data/members.csv"))
  
  bind_rows(
    
       # Calculate share of influential citizens/social orgs
    temp %>%
      group_by(committee_id, community_participation) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(community_participation == "yes") %>%
      select(-community_participation) %>%
      ungroup() %>%
      mutate(type = "community_participation"),
    
       # Calculate share of influential citizens/social orgs/religious orgs
    temp %>%
      group_by(committee_id, community_participation_relig) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(community_participation_relig == "yes") %>%
      select(-community_participation_relig) %>%
      ungroup() %>%
      mutate(type = "community_participation_relig"),
    
    # Calculate share of government officials
    temp %>%
      group_by(committee_id, govt) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(govt == "yes") %>%
      select(-govt) %>%
      ungroup() %>%
      mutate(type = "govt"),
    
    # Calculate share of researchers
    temp %>%
      group_by(committee_id, research) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(research == "yes") %>%
      select(-research) %>%
      ungroup() %>%
      mutate(type = "research"),
    
    # Calculate share of elected officials
    temp %>%
      group_by(committee_id, politician) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(politician == "yes") %>%
      select(-politician) %>%
      ungroup() %>%
      mutate(type = "politician"),
    
    # Calculate share of social orgs
    temp %>%
      group_by(committee_id, social_org) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(social_org == "yes") %>%
      select(-social_org) %>%
      ungroup() %>%
      mutate(type = "social_org"),
    # Calculate share of influential citizens
    temp %>%
      group_by(committee_id, influential_citizen) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(influential_citizen == "yes") %>%
      select(-influential_citizen) %>%
      ungroup() %>%
      mutate(type = "influential_citizen"),
    
    
    # Calculate share of women
    temp %>%
      group_by(committee_id, gender) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(gender == "woman") %>%
      select(-gender) %>%
      ungroup() %>%
      mutate(type = "gender"),
    
    
        # Calculate share of business
    temp %>%
      group_by(committee_id, business) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(business == "yes") %>%
      select(-business) %>%
      ungroup() %>%
      mutate(type = "business"),
    
    # Calculate share of fisheries
    temp %>%
      group_by(committee_id, fisheries) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(fisheries == "yes") %>%
      select(-fisheries) %>%
      ungroup() %>%
      mutate(type = "fisheries"),
    
    # Calculate share of agriculture
    temp %>%
      group_by(committee_id, agriculture) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(agriculture == "yes") %>%
      select(-agriculture) %>%
      ungroup() %>%
      mutate(type = "agriculture"),
    
    
    # Calculate share of religious
    temp %>%
      group_by(committee_id, religious) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(religious == "yes") %>%
      select(-religious) %>%
      ungroup() %>%
      mutate(type = "religious"),
    
    # Calculate share of religious
    temp %>%
      group_by(committee_id, media) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(media == "yes") %>%
      select(-media) %>%
      ungroup() %>%
      mutate(type = "media"),
    
    # Calculate share of health_care
    temp %>%
      group_by(committee_id, health_care) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(health_care == "yes") %>%
      select(-health_care) %>%
      ungroup() %>%
      mutate(type = "health_care"),
    
    # Calculate share of health_care
    temp %>%
      group_by(committee_id, emergency_services) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(emergency_services == "yes") %>%
      select(-emergency_services) %>%
      ungroup() %>%
      mutate(type = "emergency_services"),
    
    
    # Calculate share of health_care
    temp %>%
      group_by(committee_id, outside_FMI_before_3_11) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(outside_FMI_before_3_11 == "yes") %>%
      select(-outside_FMI_before_3_11) %>%
      ungroup() %>%
      mutate(type = "outside_FMI_before_3_11"),
    
    # Calculate share of health_care
    temp %>%
      group_by(committee_id, lawyer) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(lawyer == "yes") %>%
      select(-lawyer) %>%
      ungroup() %>%
      mutate(type = "lawyer")) %>%
    
    return()
}

# Extract results across all block permutations
system.time(
  read_rds("perm/block_perm.rds") %>%
    split(.$replicate) %>%
    map_dfr(~get_committee(.), .id = "replicate") %>%
    ungroup() %>%
    saveRDS("perm/null_committee.rds")
)
```

## 2.4 Town Percentages

```{r, message = FALSE, warning = FALSE, eval = FALSE}
# Write a function to extract the same data, but for each committee
get_town = function(data){
  # Print number of replications we have passed
  print(paste(data$replicate[1]))
  
  temp <- data %>%
    # Identify memberships
    filter(weight > 0) %>%
    select(member_id, committee_id) %>%
    left_join(by = "committee_id", y = read_csv("data/committees.csv") %>%
                select(committee_id, town)) %>%
    # Join in member traits
    left_join(by = "member_id", y = read_csv("data/members.csv"))
  
  bind_rows(
         # Calculate share of influential citizens/social orgs
    temp %>%
      group_by(town, community_participation) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(community_participation == "yes") %>%
      select(-community_participation) %>%
      ungroup() %>%
      mutate(type = "community_participation"),
    
       # Calculate share of influential citizens/social orgs/religious orgs
    temp %>%
      group_by(town, community_participation_relig) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(community_participation_relig == "yes") %>%
      select(-community_participation_relig) %>%
      ungroup() %>%
      mutate(type = "community_participation_relig"),
    
    # Calculate share of government officials
    temp %>%
      group_by(town, govt) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(govt == "yes") %>%
      select(-govt) %>%
      ungroup() %>%
      mutate(type = "govt"),
    
    # Calculate share of researchers
    temp %>%
      group_by(town, research) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(research == "yes") %>%
      select(-research) %>%
      ungroup() %>%
      mutate(type = "research"),
    
    # Calculate share of elected officials
    temp %>%
      group_by(town, politician) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(politician == "yes") %>%
      select(-politician) %>%
      ungroup() %>%
      mutate(type = "politician"),
    
    # Calculate share of social orgs
    temp %>%
      group_by(town, social_org) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(social_org == "yes") %>%
      select(-social_org) %>%
      ungroup() %>%
      mutate(type = "social_org"),
    # Calculate share of influential citizens
    temp %>%
      group_by(town, influential_citizen) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(influential_citizen == "yes") %>%
      select(-influential_citizen) %>%
      ungroup() %>%
      mutate(type = "influential_citizen"),
    
    
    # Calculate share of women
    temp %>%
      group_by(town, gender) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(gender == "woman") %>%
      select(-gender) %>%
      ungroup() %>%
      mutate(type = "gender"),

        # Calculate share of business
    temp %>%
      group_by(town, business) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(business == "yes") %>%
      select(-business) %>%
      ungroup() %>%
      mutate(type = "business"),
        
    # Calculate share of fisheries
    temp %>%
      group_by(town, fisheries) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(fisheries == "yes") %>%
      select(-fisheries) %>%
      ungroup() %>%
      mutate(type = "fisheries"),
    
    # Calculate share of agriculture
    temp %>%
      group_by(town, agriculture) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(agriculture == "yes") %>%
      select(-agriculture) %>%
      ungroup() %>%
      mutate(type = "agriculture"),
    
    
    # Calculate share of religious
    temp %>%
      group_by(town, religious) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(religious == "yes") %>%
      select(-religious) %>%
      ungroup() %>%
      mutate(type = "religious"),
    
    # Calculate share of religious
    temp %>%
      group_by(town, media) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(media == "yes") %>%
      select(-media) %>%
      ungroup() %>%
      mutate(type = "media"),
    
    # Calculate share of health_care
    temp %>%
      group_by(town, health_care) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(health_care == "yes") %>%
      select(-health_care) %>%
      ungroup() %>%
      mutate(type = "health_care"),
    
    # Calculate share of health_care
    temp %>%
      group_by(town, emergency_services) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(emergency_services == "yes") %>%
      select(-emergency_services) %>%
      ungroup() %>%
      mutate(type = "emergency_services"),
    
    
    # Calculate share of health_care
    temp %>%
      group_by(town, outside_FMI_before_3_11) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(outside_FMI_before_3_11 == "yes") %>%
      select(-outside_FMI_before_3_11) %>%
      ungroup() %>%
      mutate(type = "outside_FMI_before_3_11"),
    
    # Calculate share of health_care
    temp %>%
      group_by(town, lawyer) %>%
      summarize(share = n()) %>%
      mutate(share = share / sum(share)) %>%
      filter(lawyer == "yes") %>%
      select(-lawyer) %>%
      ungroup() %>%
      mutate(type = "lawyer")) %>%
    
    return()
}

# Extract results across all block permutations
system.time(
  read_rds("perm/block_perm.rds") %>%
    split(.$replicate) %>%
    map_dfr(~get_town(.), .id = "replicate") %>%
    ungroup() %>%
    saveRDS("perm/null_town.rds")
)

```

## 2.5 Diversity

```{r, message = FALSE, warning = FALSE}
get_diversity = function(data){
  # Get number of samples processed so far
  print(paste(data$replicate[1]))
  
  temp <- data %>%
    # Identify memberships
    filter(weight > 0) %>%
    select(member_id, committee_id) %>%
    left_join(by = "committee_id", y = read_csv("data/committees.csv") %>%
                select(committee_id, town, geography)) %>%
    # Join in member traits
    left_join(by = "member_id", y = read_csv("data/members.csv"))
  
  bind_rows(
    # Measure diversity of representation
    # For each committee, how many different groups were present?
    temp %>%
      select(member_id, committee_id, 
             govt, politician, business, agriculture, fisheries, 
             research, social_org, influential_citizen,
             health_care, emergency_services, media, religious) %>%
      pivot_longer(
        cols = -c(member_id, committee_id),
        names_to = "type",
        values_to = "value") %>%
      # Filter to just groups represented
      filter(value == "yes") %>%
      # Now grab just the committee ids and group types for those individual identities
      select(committee_id, type) %>%
      distinct() %>%
      # Tally how many distinct types were represented
      group_by(unit = committee_id) %>%
      summarize(share = n()) %>%
      mutate(type = "diversity",
             level = "committee"),
    
    # For each town, how many different groups were present?
    temp %>%
      select(member_id, town, 
             govt, politician, business, agriculture, fisheries, 
             research, social_org, influential_citizen,
             health_care, emergency_services, media, religious) %>%
      pivot_longer(
        cols = -c(member_id, town),
        names_to = "type",
        values_to = "value") %>%
      # Filter to just groups represented
      filter(value == "yes") %>%
      # Now grab just the town names and group types for those individual identities
      select(town, type) %>%
      distinct() %>%
      # Tally how many distinct types were represented
      group_by(unit = town) %>%
      summarize(share = n()) %>%
      mutate(type = "diversity",
             level = "town"),
    
    # For each prefecture, how many different groups were present?
    temp %>%
      select(member_id, geography, 
             govt, politician, business, agriculture, fisheries, 
             research, social_org, influential_citizen,
             health_care, emergency_services, media, religious) %>%
      pivot_longer(
        cols = -c(member_id, geography),
        names_to = "type",
        values_to = "value") %>%
      # Filter to just groups represented
      filter(value == "yes") %>%
      # Now grab just the town names and group types for those individual identities
      select(geography, type) %>%
      distinct() %>%
      # Tally how many distinct types were represented
      group_by(unit = geography) %>%
      summarize(share = n()) %>%
      mutate(type = "diversity",
             level = "prefecture")
  ) %>%
    return()

}

# Extract results across all block permutations
system.time(
  read_rds("perm/block_perm.rds") %>%
    split(.$replicate) %>%
    map_dfr(~get_diversity(.), .id = "replicate") %>%
    ungroup() %>%
    saveRDS("perm/null_diversity.rds")
)
```


## 2.6 Member Traits x Network Connectivity

```{r, message = FALSE, warning = FALSE}
# Average degree
# Calculate degree

# Average number of different committees a person is connected to, 
# based on their demographics

get_meandegree = function(data){
  
  print(paste(data$replicate[1]))
  
  temp <- data %>%
    # Identify memberships
    filter(weight > 0) %>%
    select(member_id, committee_id) %>%
    left_join(by = "committee_id", y = read_csv("data/committees.csv") %>%
                select(committee_id, town)) %>%
    # Join in member traits
    left_join(by = "member_id", y = read_csv("data/members.csv"))
  
  bind_rows(
    
    
    
    temp %>%
      group_by(member_id, community_participation) %>%
      summarize(committees = n()) %>%
      ungroup() %>%
      group_by(measure = community_participation) %>%
      summarize(mwdeg = mean(committees, na.rm = TRUE)) %>%
      mutate(type = "community_participation"),
    
        
    temp %>%
      group_by(member_id, community_participation_relig) %>%
      summarize(committees = n()) %>%
      ungroup() %>%
      group_by(measure = community_participation_relig) %>%
      summarize(mwdeg = mean(committees, na.rm = TRUE)) %>%
      mutate(type = "community_participation_relig"),
    
    
    temp %>%
      group_by(member_id, research) %>%
      summarize(committees = n()) %>%
      ungroup() %>%
      group_by(measure = research) %>%
      summarize(mwdeg = mean(committees, na.rm = TRUE)) %>%
      mutate(type = "research"),
    
    temp %>%
      group_by(member_id, gender) %>%
      summarize(committees = n()) %>%
      ungroup() %>%
      group_by(measure = gender) %>%
      summarize(mwdeg = mean(committees, na.rm = TRUE)) %>%
      mutate(type = "gender"),
    
    
    temp %>%
      group_by(member_id, govt) %>%
      summarize(committees = n()) %>%
      ungroup() %>%
      group_by(measure = govt) %>%
      summarize(mwdeg = mean(committees, na.rm = TRUE)) %>%
      mutate(type = "govt"),
    
    
    temp %>%
      group_by(member_id, politician) %>%
      summarize(committees = n()) %>%
      ungroup() %>%
      group_by(measure = politician) %>%
      summarize(mwdeg = mean(committees, na.rm = TRUE)) %>%
      mutate(type = "politician"),
    
    temp %>%
      group_by(member_id, business) %>%
      summarize(committees = n()) %>%
      ungroup() %>%
      group_by(measure = business) %>%
      summarize(mwdeg = mean(committees, na.rm = TRUE)) %>%
      mutate(type = "business"),
    
    
    temp %>%
      group_by(member_id, agriculture) %>%
      summarize(committees = n()) %>%
      ungroup() %>%
      group_by(measure = agriculture) %>%
      summarize(mwdeg = mean(committees, na.rm = TRUE)) %>%
      mutate(type = "agriculture"),
    
    
    temp %>%
      group_by(member_id, fisheries) %>%
      summarize(committees = n()) %>%
      ungroup() %>%
      group_by(measure = fisheries) %>%
      summarize(mwdeg = mean(committees, na.rm = TRUE)) %>%
      mutate(type = "fisheries"),
    
    
    
    temp %>%
      group_by(member_id, social_org) %>%
      summarize(committees = n()) %>%
      ungroup() %>%
      group_by(measure = social_org) %>%
      summarize(mwdeg = mean(committees, na.rm = TRUE)) %>%
      mutate(type = "social_org"),
    
    
    temp %>%
      group_by(member_id, influential_citizen) %>%
      summarize(committees = n()) %>%
      ungroup() %>%
      group_by(measure = influential_citizen) %>%
      summarize(mwdeg = mean(committees, na.rm = TRUE)) %>%
      mutate(type = "influential_citizen"),
    
    
    temp %>%
      group_by(member_id, religious) %>%
      summarize(committees = n()) %>%
      ungroup() %>%
      group_by(measure = religious) %>%
      summarize(mwdeg = mean(committees, na.rm = TRUE)) %>%
      mutate(type = "religious"),
    
    temp %>%
      group_by(member_id, health_care) %>%
      summarize(committees = n()) %>%
      ungroup() %>%
      group_by(measure = health_care) %>%
      summarize(mwdeg = mean(committees, na.rm = TRUE)) %>%
      mutate(type = "health_care"),
    
    
    temp %>%
      group_by(member_id, emergency_services) %>%
      summarize(committees = n()) %>%
      ungroup() %>%
      group_by(measure = emergency_services) %>%
      summarize(mwdeg = mean(committees, na.rm = TRUE)) %>%
      mutate(type = "emergency_services"),
    
    
    temp %>%
      group_by(member_id, media) %>%
      summarize(committees = n()) %>%
      ungroup() %>%
      group_by(measure = media) %>%
      summarize(mwdeg = mean(committees, na.rm = TRUE)) %>%
      mutate(type = "media")
  ) %>%
    return()
}


# Extract results across all block permutations
system.time(
  read_rds("perm/block_perm.rds") %>%
    split(.$replicate) %>%
    map_dfr(~get_meandegree(.), .id = "replicate") %>%
    ungroup() %>%
    saveRDS("perm/null_meandegree.rds")
)
```


# 3. Build Committee-level Dataset

```{r}
read_rds("data/matrix.rds") %>%
  get_pref() %>%
    pivot_wider(
    id_cols = geography,
    names_from = type,
    values_from = share, 
    values_fill = list(share = 0)) %>%
  mutate_at(vars(-geography), funs(round(.,2)))
```

# 4. Model Community Participation

```{r, message = FALSE, warning = FALSE}
# install.packages("remotes") # Install remotes so we can install newest version of texreg
# library(remotes) # load
# install_github("leifeld/texreg") # install newest version of texreg
library(texreg)

# Import committee dataset
dat <- read_csv("data/committees.csv") %>%
  # create a municipal level indicator
  mutate(municipal = if_else(level == "municipal", "municipal", "nonmunicipal") %>% 
           factor() %>% relevel(ref = "nonmunicipal")) %>%
  # Join in for each committee the share of members who are XXX type
  left_join(by = "committee_id", 
            y = read_rds("data/matrix.rds") %>%
              get_committee() %>%
              pivot_wider(
                id_cols = committee_id,
                names_from = type,
                values_from = share, 
                values_fill = list(share = 0)) %>%
              mutate_at(vars(-committee_id), funs(round(.,2)))) %>%
  # Join in number of different types of orgs per committee
  left_join(by = c("committee_id" = "unit"),
            y = read_rds("data/matrix.rds") %>%
              get_diversity() %>%
              filter(level == "committee") %>%
              pivot_wider(
                id_cols = unit,
                names_from = type,
                values_from = share))
```

## 4.1 Visualize Membership

```{r}
dat %>%
  select(committee_id, geography, level, community_participation_relig, gender,
         business, agriculture, fisheries, govt, politician, research) %>%
  pivot_longer(
    cols = -c(committee_id, geography, level),
    names_to = "type",
    values_to = "value") %>%
  mutate(type = type %>% recode_factor(
    "community_participation_relig" = "Citizens, NGOs,\n& Religious Orgs",
    "gender" = "Women",
    "business" = "Businesses",
    "agriculture" = "Agriculture",
    "fisheries" = "Fisheries",
    "govt" = "Government\nBureaucrats",
    "politician" = "Elected Officials",
    "research" = "Engineers\n& Researchers")) %>%
  ggplot(mapping = aes(x = reorder(type, desc(type)), y = value, fill = type)) +
  geom_jitter(height = 0, width = 0.05, color = "darkgrey") +
  geom_violin(alpha = 0.5) +
  geom_point(data = . %>% group_by(type) %>% 
               summarize(mean = mean(value, na.rm = TRUE)),
             mapping = aes(x = type, y = mean), color = "black", size = 3) +
  geom_point(data = . %>% group_by(type) %>% 
               summarize(mean = mean(value, na.rm = TRUE)),
             mapping = aes(x = type, y = mean), color = "white", size = 2) +
  theme_classic() +
  theme(panel.border = element_rect(color = "black", fill = NA),
        plot.caption = element_text(hjust = 0.5)) +
  scale_fill_viridis(discrete = TRUE, option = "plasma", begin = 0.1, end = 0.8) +
  coord_flip() +
  labs(y = "(%) Members per Committee", x = "",
       caption = "Points displays % of members in each committee representing each interest group,\njittered by 0.05 for visual clarity.Violin plots highlight distribution of values,\nwith a single white point per distribution indicating the mean.") +
  guides(fill = FALSE)
  

```

## 4.2 Prefectural Comparisons

```{r}

pref <- read_rds("data/matrix.rds") %>%
  get_pref() %>%
  filter(type %in% c("community_participation_relig", "gender",
                     "business", "agriculture", "fisheries",
                     "govt", "politician", "research")) %>%
  mutate(type = type %>% recode_factor(
    "community_participation_relig" = "Citizens, NGOs,\n& Religious Orgs",
    "gender" = "Women",
       "research" = "Engineers\n& Researchers",
    "govt" = "Government\nBureaucrats",
    "politician" = "Elected Officials",
    "business" = "Businesses",
    "agriculture" = "Agriculture",
    "fisheries" = "Fisheries")) %>%
    mutate(geography = paste(str_sub(geography, 0, 1) %>% toupper(),
                             str_sub(geography, 2,-1), sep = ""))

pref %>%
  ggplot(mapping = aes(x = geography, y = share * 100, fill = geography)) +
  geom_col() +
  coord_flip() +
  theme_classic() +
  theme(panel.grid.major = element_line(color = "grey", linetype = "dotted"),
    panel.spacing = unit(0.3, "cm"),
    panel.border = element_rect(color = "black", fill = NA)) +
  facet_wrap(~type, ncol = 4) +
  guides(fill = FALSE) +
  labs(y = "", x = "% Members on Committees") +
  scale_fill_viridis(discrete = TRUE, begin = 0.1, end = 0.8, option = "plasma")
```


## 4.3 Town Membership Patterns Table

```{r, message=FALSE, warning = FALSE}
#install.packages("flextable")
library(flextable)
library(officer)

tab <- read_csv("data/committees.csv") %>%
  # Get all distinct towns with their geography and level of government specified
  select(town, geography, level) %>%
  distinct() %>%
  # Capitalize names
  mutate_at(vars(geography, level), funs(paste(str_sub(., 0,1) %>% toupper(), 
                                               str_sub(., 2, -1), sep = ""))) %>%
  # Join in Diversity
  left_join(by = "town",
            y = read_rds("data/matrix.rds") %>%
              get_diversity() %>%
              filter(level == "town") %>%
              select(town = unit, diversity = share, -level, -type) ) %>%
  
  # Join in Percentages
  left_join(by = "town",
            y = read_rds("data/matrix.rds") %>%
              get_town() %>%
              pivot_wider(
                id_cols = town,
                names_from = type,
                values_from = share,
                values_fill = list(share = 0))) %>%
  # Fix and translate names
  mutate(town = town %>% 
           str_replace_all(c("-shi" = " City",
                             "-cho" = " Town",
                             "-son" = " Village",
                             "-mura" = " Village",
                             "-machi" = " Town")) %>%
           recode("National" = "National Committees",
                  "Miyagi-ken" = "Miyagi Prefectural Committees",
                  "Iwate-ken" = "Iwate Prefectural Committees")) %>%
  arrange(desc(geography), desc(level)) %>%
  # Multiply percentages by 100 and round
  mutate_at(vars(-town, -geography, -level, -diversity), funs(round(. * 100, 0)))

```



```{r}
tab %>%
  # Select columns
  flextable(
    col_keys = c("geography", "level", "town", 
                 "diversity", "community_participation_relig", "social_org", "influential_citizen",
                 "religious", "gender", "business", "agriculture", "fisheries", "politician",
                 "govt", "research", "health_care", "emergency_services", "media")) %>%
  # Make a cool visual representation of number of interest groups
  compose(j = ~diversity,
          value = as_paragraph(
            linerange(value = diversity, max = max(diversity), rangecol = "grey", stickcol = "steelblue")),
          part = "body") %>%
  # Set column labels
  set_header_labels(
    "geography" = "Prefecture",
    "level" = "Level",
    "town" = "Locale",
    "diversity" = "# Interest Groups",
    "community_participation_relig" = "Community Participation",
    "social_org" = "NGOs",
    "influential_citizen" = "Influential Citizens",
    "religious" = "Religious Orgs",
    # Gender
    "gender" = "Women",
    # Business
    "business" = "Business",
    "agriculture" = "Agriculture",
    "fisheries" = "Fisheries",
    "politician" = "Elected Officials",
    "govt" = "Government Bureaucrats",
    "research" = "Engineers & Researchers",
    "health_care" = "Health Care",
    "emergency_services" = "Emergency Services",
    "media" = "Media") %>%
  # Bold percentages over 30
  bold(~ community_participation_relig > 30, ~ community_participation_relig) %>%
  bold(~ social_org > 30, ~ social_org) %>%
  bold(~ influential_citizen > 30, ~ influential_citizen) %>%
  bold(~ religious > 30, ~ religious) %>%
  bold(~ gender > 30, ~ gender) %>%
  bold(~ business > 30, ~ business) %>%
  bold(~ agriculture > 30, ~ agriculture) %>%
  bold(~ fisheries > 30, ~ fisheries) %>%
  bold(~ politician > 30, ~ politician) %>%
  bold(~ govt > 30, ~ govt) %>%
  bold(~ research > 30, ~ research) %>%
  bold(~  health_care > 30, ~ health_care ) %>%
  bold(~ emergency_services > 30, ~ emergency_services) %>%
  bold(~ media > 30, ~ media) %>%
  footnote(i = 1, j = ~community_participation_relig,
          value = as_paragraph(
              c("Numbers represent % of members in a committee from each interest group. Percentage is bold where over 30% of members are from one interest group.")), ref_symbols = c("*"), part = "header") %>%
  valign(valign = "bottom", part = "header") %>%
  autofit()
```


## 4.4 Models

```{r}
# Model Diversity of Sources of Involvement

# You'd think you need a poisson, since it's a count, but actually, 
# the residuals are normally distributed and the Breusch Pagan Test does not 
# show statistically significant heteroskedasticity
# so, OLS is fine

#m5 <- dat %>%
#  glm(formula = diversity ~ 
#       community_participation_relig + gender +
#        govt + politician + research + business + agriculture + fisheries, 
### Excluding these
###       health_care + media + emergency_services, 
#      family = "poisson")
#
#ggplot(dat,mapping = aes(x=m5$residuals)) +
#  geom_histogram()

m1 <- dat %>%
  lm(formula = diversity ~ 
       social_org + influential_citizen + religious + gender +
        govt + politician + research + business + agriculture + fisheries +
       #health_care + media + emergency_services + 
       geography + municipal)

# Model Participation of Women
m2 <- dat %>%
  lm(formula = gender ~ govt + politician + research + 
       social_org + influential_citizen + religious + 
       business + agriculture + fisheries +
       #health_care + media + emergency_services + 
       geography + municipal) 


# Model Community Participation via Social Orgs/NGos
m3 <- dat %>%
  lm(formula = social_org ~ govt + politician + research +
       business + agriculture + fisheries + gender + 
       religious + influential_citizen +
       #health_care + media + emergency_services + 
       geography + municipal) 

# Model Community PArticipation via Social Orgs/NGOs & Influential Citizens
m4 <- dat %>%
  lm(formula = community_participation ~ govt + politician + research +
       business + agriculture + fisheries + gender + religious +
       #health_care + media + emergency_services + 
       geography + municipal) 

# Model Community Participation via Social Orgs, NGOs, Influential Citizens, or Religious orgs
m5 <- dat %>%
  lm(formula = community_participation_relig ~ govt + politician + research +
       business + agriculture + fisheries + gender +
       #health_care + media + emergency_services + 
       geography + municipal) 

# install.packages("pscl")
# install.packages("lmtest")

texreg::htmlreg(
  list(m1,m2,m3,m4,m5),
  bold = 0.10, 
  file = "table1.html",
  stars = c(0.001, 0.01, 0.05, 0.10), 
  custom.header = list(
    "Diversity of Interests" = 1,
    "Gender Representation" = 2,
    "% Community Representation" = 3:5),
  custom.model.names = c("# Interest Groups", "% Women", 
                         "% NGOs", "% NGOs, Citizens", 
                         "% NGOs, Citizens, Religious Orgs"),
  custom.coef.map = list(
    # Community Participation
    "social_org" = "% NGOs",
    "influential_citizen" = "% Influential Citizens",
    "religious" = "% Religious Orgs",
    # Gender
    "gender" = "% Women",
    # Business
    "business" = "% Business",
    "agriculture" = "% Agriculture",
    "fisheries" = "% Fisheries",
    "politician" = "% Elected Officials",
        "govt" = "% Government Bureaucrats",
    "research" = "% Engineers & Researchers",
    "health_care" = "% Health Care",
    "emergency_services" = "% Emergency Services",
    "media" = "% Media",
    "geographymiyagi" = "Miyagi Committee",
    "geographynational" = "National Committee",
    "municipalmunicipal" = "Municipal Committee"),
  groups = list(
    "<b>Community Representation</b>" = 1:3,
    "<b>Gender Representation</b>" = 4,
    "<b>Business Interests</b>" = 5:7,
    "<b>Elected Officials</b>" = 8,
    "<b>Government Bureaucrats and Planners</b>" = 9,
    "<b>Experts</b>" = 10,
    "<b>Committee Type<b/>" = 11:13),
  include.fstatistic = TRUE)


remove(m1,m2,m3,m4,m5)
```

### 4.4.1 Zelig
```{r}

```



# 5. Member Connectivity

## 5.1 Traits of Top Connectors

```{r}
g <- read_rds("data/edgelist_members.rds") %>% 
  filter(weight > 0) %>%
  tbl_graph(nodes = read_csv("data/members.csv"), directed = FALSE) %>%
  activate(nodes) %>%
  mutate(
    between = centrality_betweenness(
      weights = weight,
      directed = FALSE,
      normalized = FALSE),
    strength = centrality_degree(weights = weight, mode = "total", loops = FALSE) / 2) %>%
  select(member_id, between, strength,
         community_participation_relig, social_org, influential_citizen, religious, gender,
         business, agriculture, fisheries, govt, politician, research) %>%
  tidygraph::as_tibble()


dat <- data.frame(
  measure = c("community_participation_relig", "social_org", "influential_citizen", "religious", "gender",
              "business", "agriculture", "fisheries", "govt", "politician", "research")) %>%
  # Join in member trait shares overall percentile
  left_join(by = "measure",
            y = g %>%
              pivot_longer(
                cols = -c(member_id, between, strength),
                names_to = "measure",
                values_to = "value") %>% 
              group_by(measure) %>%
              summarize(share_0 = (sum(value %in% c("yes", "woman")) / n()) %>%
                          round(2))) %>%
  
  # Join in member trait shares for the 90th percentile
  left_join(by = "measure",
            y = g %>%
              filter(between >= quantile(between, 0.90),
                     strength >= quantile(strength, 0.90)) %>%
              pivot_longer(
                cols = -c(member_id, between, strength),
                names_to = "measure",
                values_to = "value") %>% 
              group_by(measure) %>%
              summarize(share_90 = (sum(value %in% c("yes", "woman")) / n()) %>%
                          round(2))) %>%
  # Join in member trait shares for the 95th percentile
  left_join(by = "measure",
            y = g %>%
              filter(between >= quantile(between, 0.95),
                     strength >= quantile(strength, 0.95)) %>%
              pivot_longer(
                cols = -c(member_id, between, strength),
                names_to = "measure",
                values_to = "value") %>% 
              group_by(measure) %>%
              summarize(share_95 = (sum(value %in% c("yes", "woman")) / n()) %>%
                          round(2))) %>%
  # Join in member trait shares for the 99th percentile
  left_join(by = "measure",
            y = g %>%
              filter(between >= quantile(between, 0.99),
                     strength >= quantile(strength, 0.99)) %>%
              
              pivot_longer(
                cols = -c(member_id, between, strength),
                names_to = "measure",
                values_to = "value") %>% 
              group_by(measure) %>%
              summarize(share_99 = (sum(value %in% c("yes", "woman")) / n()) %>%
                          round(2))) %>%
  # Pivot for easy visualization
  pivot_longer(
    cols = -measure,
    names_to = "percentile", 
    names_prefix = "share_",
    values_to = "share")

dat %>%
  mutate(percentile = percentile %>% recode_factor(
    "0" = "Overall",
    "90" = "90%",
    "95" = "95%",
    "99" = "99%"),
    measure = measure %>% recode_factor(
      "community_participation_relig" = "Citizens, NGOs, & Religious Orgs",
      "community_participation" = "Citizens & NGOs",
      "social_org" = "NGOs",
      "influential_citizen" = "Influential Citizens",
      "religious" = "Religious Orgs",
      "gender" = "Women",
      "fisheries" = "Fisheries",
        "agriculture" = "Agriculture",
      "business" = "Business",
      "govt" = "Government Bureaucrats",
      "politician" = "Elected Officials",
      "research" = "Engineers & Researchers")) %>%
  ggplot(mapping = aes(y = measure, x = percentile, fill = share * 100, label = share * 100)) +
  geom_tile() +
  geom_text(color = "white") +
  theme_classic() +
  theme(panel.background = element_rect(fill = "white", color = NA),
        panel.border = element_rect(fill = NA, color = "black"),
        plot.caption = element_text(hjust = 0.5)) +
  labs(x = "Percentile by Connectivity & Bridging Capacity", y= "",
       fill = "% of Members",
       caption = "Connectivity refers to weighted degree, how many members share a committee with the person in question.\nBridging capacity refers to weighted betweenness, the number of shortest paths crossing through that member.") +
  scale_fill_gradient(low = "grey", high = "black")
```

## 5.2 Average Connectivity 


### 5.2.1 Moran's I

```{r, message = FALSE, warning = FALSE}
# Get node outcome
dat <- read_csv("data/members_imputed.csv") %>%
  select(member_id, community_participation_relig, community_participation, 
         social_org, influential_citizen, religious, gender,
         research, govt, politician, business, agriculture, fisheries,
         health_care, emergency_services, media) %>%
  mutate_at(vars(-member_id),
            funs(if_else(. %in% c("yes", "woman"), 1, 0))) %>%
  # Pivot longer
  pivot_longer(
    cols = -member_id,
    names_to = "type",
    values_to = "value")  %>%
  arrange(member_id)

```

```{r, message = FALSE, warning = FALSE, eval = FALSE}
get_many_moran = function(data){
  # Print number of permutations completed
  print(paste(data$replicate[1]))
  
  # Generate member coaffiliation network edgelist
  # matrix of weights
  
  temp <- expand_grid(
    member_id = read_csv("data/members.csv")$member_id,
    committee_id = read_csv("data/committees.csv")$committee_id) %>%
    # Join in edges and weights
    left_join(by = c("member_id", "committee_id"),
              # Join in temporary edgelist,
              # from permutation
              y = data %>%
                filter(weight > 0) %>%
                select(member_id, committee_id, weight)) %>%
    mutate(weight = if_else(is.na(weight), 
                            true = 0, 
                            false = as.numeric(weight))) %>%
    # pivot into a wide incidence matrix
    pivot_wider(
      id_cols = member_id,
      names_from = committee_id,
      values_from = weight) %>%
    tibble::column_to_rownames(var = "member_id") %>%
    as.matrix() 
  
  # Generate member coaffiliation network edgelist
  wm <- temp %*% t(temp) %>%
    # remove the diagonal
    diag.remove(remove.val = 0)   
  
  remove(temp)
  
  get_moran = function(var){
    # Source:
    # https://rspatial.org/raster/analysis/3-spauto.html
    
    # Get outcome
    y <- dat %>%
      filter(type == var) %>%
      select(value) %>% unlist()
    
    # Get number of observations
    n <- length(y)
    
    # Get mean of node outcome
    ybar <- mean(y, na.rm = TRUE)
    
    # Difference in expected vs. actual
    dy <- y - ybar
    
    # Obtain (yi-ybar)(yj-ybar) for all pairs
    yi <- rep(dy, each = n)
    yj <- rep(dy)
    yiyj <- yi * yj
    
    # Make a matrix of the multiplied pairs
    pm <- matrix(yiyj, ncol = n)
    
    # Multiple matrix of difference in expected outcome by network matrix
    pmw <- pm * wm
    
    # Obtain sum of network-weighted matrix of difference in expected vs. actual outcomes
    spmw <- sum(pmw)
    
    # Divide value by sum of weights
    smw <- sum(wm)
    # Normalize by sum of weights
    sw <- spmw / smw
    
    # Compute inverse variance of y
    vr <- n / sum(dy^2)
    
    # Calculate Moran's I
    data.frame(stat = vr * sw,
               measure = "Moran's I",
               type = var) %>%
      return()
    
  }
  
  # Run function to get Moran's I
  c("community_participation_relig", "community_participation", 
    "social_org", "influential_citizen", "religious", "gender",
    "research", "govt", "politician", "business", "agriculture", "fisheries",
    "health_care", "emergency_services", "media") %>%
    lapply(get_moran) %>%
    bind_rows() %>%
    return()
  
  
}

read_rds("perm/block_perm.rds") %>%
  split(.$replicate) %>%
  map_dfr(. %>% get_many_moran, .id = "replicate") %>%
  saveRDS("perm/null_moran.rds")
```

```{r, message = FALSE, warning = FALSE}
moran <- read_rds("data/matrix.rds") %>%
  # Get Moran's I statistics for observed network
  get_many_moran() %>%
  # Join in Moran's I statistics from permuted networks
  left_join(by = "type",
            y = read_rds("perm/null_moran.rds") %>%
              select(type, permuted = stat)) %>%
  group_by(measure, type) %>%
  
    mutate(permuted = permuted - mean(permuted),
         stat_zero = stat - mean(permuted)) %>%
  # Now in how many cases was the observed above the permuted?
  summarize(
    moran = mean(stat),
    moran_p_value = sum(abs(stat_zero) <= abs(permuted)) / n())
remove(dat)
```

### 5.2.2 Mean Centrality

```{r, message = FALSE, warning = FALSE}
# Using an edgelist supplied,
# create a graph object

get_mean_centrality = function(data){
  
  print(paste(data$replicate[1]))
  
  # Using original/permuted member-committee edglist,
  # Get an incidence matrix
  temp <- expand_grid(
    member_id = read_csv("data/members.csv")$member_id,
    committee_id = read_csv("data/committees.csv")$committee_id) %>%
    # Join in edges and weights
    left_join(by = c("member_id", "committee_id"),
              # Join in temporary edgelist,
              # from permutation
              y = data %>%
                filter(weight > 0) %>%
                select(member_id, committee_id, weight)) %>%
    mutate(weight = if_else(is.na(weight), 
                            true = 0, 
                            false = as.numeric(weight))) %>%
    # pivot into a wide incidence matrix
    pivot_wider(
      id_cols = member_id,
      names_from = committee_id,
      values_from = weight) %>%
    tibble::column_to_rownames(var = "member_id") %>%
    as.matrix() 
  
  # Generate member coaffiliation network edgelist
  temp <- temp %*% t(temp) %>%
    # remove the diagonal
    diag.remove(remove.val = 0) %>%
    as.data.frame() %>%
    tibble::rownames_to_column(var = "from") %>%
    # Turn into edgelist
    pivot_longer(
      cols = -from,
      names_to = "to",
      values_to = "weight") %>%
    filter(weight > 0)
  
  temp %>%
    tbl_graph(nodes = read_csv("data/members.csv"), directed = FALSE) %>%
    activate(nodes) %>%
    # Calculate member centrality
    mutate(
      between = centrality_betweenness(
        weights = weight,
        directed = FALSE,
        normalized = FALSE),
      strength = centrality_degree(weights = weight, 
                                   mode = "total", loops = FALSE) / 2) %>%
    # Gather key variables
    select(member_id, between, strength,
           community_participation_relig, community_participation, 
           social_org, influential_citizen, religious, gender,
           research, govt, politician, business, agriculture, fisheries,
           health_care, emergency_services, media) %>%
    # Make a dataframe
    tidygraph::as_tibble() %>%
    # Now pivot out for easy calculation
    pivot_longer(
      cols = -c(member_id, between, strength),
      names_to = "type",
      values_to = "value") %>%
    # Get just members who were identified with that trait
    filter(value %in% c("yes", "woman")) %>%
    # For each trait,
    # calculate the mean weighted degree and betweenness 
    # among members with this trait
    group_by(type) %>%
    summarize(
      between = mean(between, na.rm = TRUE),
      strength = mean(strength, na.rm = TRUE)) %>%
    return()
}

system.time(
  # Now generated a permuted null distribution
  # of mean centrality statistics
  read_rds("perm/block_perm.rds") %>%
    split(.$replicate) %>%
    map_dfr(. %>% get_mean_centrality, .id = "replicate")  %>%
    saveRDS("perm/null_mean_centrality.rds")
)
```

```{r, message = FALSE, warning = FALSE}
# Gather p-values for centrality by member type
centrality <- read_rds("data/matrix.rds") %>%
  # Get centrality measures by node types
  get_mean_centrality() %>%
  # Join in statistics from permuted networks
  left_join(by = "type",
            y = read_rds("perm/null_mean_centrality.rds") %>%
              rename(between_perm = between, strength_perm = strength)) %>%
  # Calculate p_values
  group_by(type) %>%
    # How far from the mean are you?
  # If positive,  you are above it
  # If negative, you are below it
  mutate(between_perm = between_perm - mean(between_perm),
         between_zero = between - mean(between_perm),
         
         strength_perm = strength_perm - mean(strength_perm),
         strength_zero = strength - mean(strength_perm)) %>%
  # Now in how many cases was the observed above the permuted?
  summarize(
    between = mean(between),
    between_p_value = sum(abs(between_zero) <= abs(between_perm)) / n(),
    strength = mean(strength),
    strength_p_value = sum(abs(strength_zero) <= abs(strength_perm)) / n())

```

### 5.2.3 Mean Committees

```{r, message = FALSE, warning = FALSE}
# Gather the mean number of committees a person is connected to
committee <- read_rds("data/matrix.rds") %>% 
  filter(weight > 0) %>%
  get_meandegree() %>%
  filter(measure %in% c("yes", "woman")) %>%
  select(-measure) %>%
  # Joint in permuted measures
  left_join(by = "type",
            y = read_rds("perm/null_meandegree.rds") %>%
              filter(measure %in% c("yes", "woman")) %>%
              select(replicate, type, permuted = mwdeg)) %>%
  # I don't ACTUALLY need to standardize it, I just need to mean center it at 0.
  group_by(type) %>%
  # How far from the mean are you?
  # If positive,  you are above it
  # If negative, you are below it
  mutate(permuted = permuted - mean(permuted),
         stat_zero = mwdeg - mean(permuted)) %>%
  # Now in how many cases was the observed above the permuted?
  summarize(mwdeg = mean(mwdeg),
            mwdeg_p_value =  sum(abs(stat_zero) <= abs(permuted)) / n())

```


### 5.2.4 Connectivity Table

```{r}
# Join together measures of connectivity
committee %>%
  left_join(by = "type",
            y = centrality) %>%
  left_join(by = "type",
            y = moran ) %>% ungroup() %>%
# Relabel types
  mutate(type = type %>% recode_factor(
    "community_participation_relig" = "Citizens, NGOs, & Religious Orgs",
    "community_participation" = "Citizens & NGOs",
    "social_org" = "NGOs",
    "influential_citizen" = "Influential Citizens",
    "religious" = "Religious Orgs",
    "gender" = "Women",
    "business" = "Business",
    "agriculture" = "Agriculture",
    "fisheries" = "Fisheries",
    "govt" = "Government Bureaucrats",
    "politician" = "Elected Officials",
    "research" = "Engineers & Researchers",
    "health_care" = "Health Care",
    "emergency_services" = "Emergency Services",
    "media" = "Media")) %>%
  mutate_at(vars(mwdeg, strength, between, moran),
            funs(round(., 2))) %>%
  mutate_at(vars(mwdeg_p_value, strength_p_value, 
                 between_p_value, moran_p_value),
            funs(gtools::stars.pval(.))) %>%
  arrange(type) %>%
  # Make a table
  flextable(
    col_keys = c("type", "mwdeg", "mwdeg_p_value",
                 "strength", "strength_p_value",
                 "between", "between_p_value",
                 "moran", "moran_p_value")) %>%
  # Set column labels
  set_header_labels(
    "type" = "Interest Group",
    "mwdeg" = "Avg. # of Committees",
    "mwdeg_p_value" = "p-value",
    "strength" = "Avg. Member Ties",
    "strength_p_value" = "p-value",
    "between" = "Avg. Bridging Capacity",
    "between_p_value" = "p-value",
    "moran" = "Network Autocorrelation",
    "moran_p_value" = "p-value") %>%
    # Add bolding
    bold(~ mwdeg_p_value %in% c("***", "**", "*", "."), ~ mwdeg) %>%
    bold(~ strength_p_value %in% c("***", "**", "*", "."), ~ strength) %>%
    bold(~ between_p_value %in% c("***", "**", "*", "."), ~ between) %>%
    bold(~ moran_p_value %in% c("***", "**", "*", "."), ~ moran) %>%
   
   # Add interpretation of columsn
 footnote(i = 1, j = ~mwdeg,
          value = as_paragraph(
              c("Depicts on average, how many committees members from that interest group participated in.")), 
          ref_symbols = c("a"), part = "header") %>%
 footnote(i = 1, j = ~strength,
          value = as_paragraph(
              c("Depicts on average, how many members sat on the same committee as members from that interest group")), 
          ref_symbols = c("b"), part = "header") %>%
     footnote(i = 1, j = ~between,
          value = as_paragraph(
              c("Depicts on average, how many shortest paths through the network pass through members from that interest group")), 
          ref_symbols = c("c"), part = "header") %>%
    
     footnote(i = 1, j = ~moran,
          value = as_paragraph(
              c("Depicts Moran's I, how much members from the same interest group are connected (-1 = heterophily, 1 = homophily).")), 
          ref_symbols = c("d"), part = "header") %>%
    # Add interpretation of p-values
  footnote(i = 1, j = ~moran_p_value,
          value = as_paragraph(
              c("*** p < 0.001, ** p < 0.01, * p < 0.05, . p < 0.10. P-values calculated from permutations of bipartite network and resulting traits.")), 
          ref_symbols = c("e"), part = "header") %>%
  valign(valign = "bottom", part = "header") %>%
    set_caption(caption = "Connectivity of Interest Groups") %>%
  autofit()

```

```{r}
remove(centrality, committee, moran)

```



## 5.3 Difference in Connectivity

```{r,message = FALSE, warning = FALSE}
# Here's the function for getting average number of committees again

# Average number of different committees a person is connected to, 
# based on their demographics

get_meandegree = function(data){
  
  print(paste(data$replicate[1]))
  
  temp <- data %>%
    # Identify memberships
    filter(weight > 0) %>%
    select(member_id, committee_id) %>%
    left_join(by = "committee_id", y = read_csv("data/committees.csv") %>%
                select(committee_id, town)) %>%
    # Join in member traits
    left_join(by = "member_id", y = read_csv("data/members.csv"))
  
  bind_rows(
    
    
    
    temp %>%
      group_by(member_id, community_participation) %>%
      summarize(committees = n()) %>%
      ungroup() %>%
      group_by(measure = community_participation) %>%
      summarize(mwdeg = mean(committees, na.rm = TRUE)) %>%
      mutate(type = "community_participation"),
    
        
    temp %>%
      group_by(member_id, community_participation_relig) %>%
      summarize(committees = n()) %>%
      ungroup() %>%
      group_by(measure = community_participation_relig) %>%
      summarize(mwdeg = mean(committees, na.rm = TRUE)) %>%
      mutate(type = "community_participation_relig"),
    
    
    temp %>%
      group_by(member_id, research) %>%
      summarize(committees = n()) %>%
      ungroup() %>%
      group_by(measure = research) %>%
      summarize(mwdeg = mean(committees, na.rm = TRUE)) %>%
      mutate(type = "research"),
    
    temp %>%
      group_by(member_id, gender) %>%
      summarize(committees = n()) %>%
      ungroup() %>%
      group_by(measure = gender) %>%
      summarize(mwdeg = mean(committees, na.rm = TRUE)) %>%
      mutate(type = "gender"),
    
    
    temp %>%
      group_by(member_id, govt) %>%
      summarize(committees = n()) %>%
      ungroup() %>%
      group_by(measure = govt) %>%
      summarize(mwdeg = mean(committees, na.rm = TRUE)) %>%
      mutate(type = "govt"),
    
    
    temp %>%
      group_by(member_id, politician) %>%
      summarize(committees = n()) %>%
      ungroup() %>%
      group_by(measure = politician) %>%
      summarize(mwdeg = mean(committees, na.rm = TRUE)) %>%
      mutate(type = "politician"),
    
    temp %>%
      group_by(member_id, business) %>%
      summarize(committees = n()) %>%
      ungroup() %>%
      group_by(measure = business) %>%
      summarize(mwdeg = mean(committees, na.rm = TRUE)) %>%
      mutate(type = "business"),
    
    
    temp %>%
      group_by(member_id, agriculture) %>%
      summarize(committees = n()) %>%
      ungroup() %>%
      group_by(measure = agriculture) %>%
      summarize(mwdeg = mean(committees, na.rm = TRUE)) %>%
      mutate(type = "agriculture"),
    
    
    temp %>%
      group_by(member_id, fisheries) %>%
      summarize(committees = n()) %>%
      ungroup() %>%
      group_by(measure = fisheries) %>%
      summarize(mwdeg = mean(committees, na.rm = TRUE)) %>%
      mutate(type = "fisheries"),
    
    
    
    temp %>%
      group_by(member_id, social_org) %>%
      summarize(committees = n()) %>%
      ungroup() %>%
      group_by(measure = social_org) %>%
      summarize(mwdeg = mean(committees, na.rm = TRUE)) %>%
      mutate(type = "social_org"),
    
    
    temp %>%
      group_by(member_id, influential_citizen) %>%
      summarize(committees = n()) %>%
      ungroup() %>%
      group_by(measure = influential_citizen) %>%
      summarize(mwdeg = mean(committees, na.rm = TRUE)) %>%
      mutate(type = "influential_citizen"),
    
    
    temp %>%
      group_by(member_id, religious) %>%
      summarize(committees = n()) %>%
      ungroup() %>%
      group_by(measure = religious) %>%
      summarize(mwdeg = mean(committees, na.rm = TRUE)) %>%
      mutate(type = "religious"),
    
    temp %>%
      group_by(member_id, health_care) %>%
      summarize(committees = n()) %>%
      ungroup() %>%
      group_by(measure = health_care) %>%
      summarize(mwdeg = mean(committees, na.rm = TRUE)) %>%
      mutate(type = "health_care"),
    
    
    temp %>%
      group_by(member_id, emergency_services) %>%
      summarize(committees = n()) %>%
      ungroup() %>%
      group_by(measure = emergency_services) %>%
      summarize(mwdeg = mean(committees, na.rm = TRUE)) %>%
      mutate(type = "emergency_services"),
    
    
    temp %>%
      group_by(member_id, media) %>%
      summarize(committees = n()) %>%
      ungroup() %>%
      group_by(measure = media) %>%
      summarize(mwdeg = mean(committees, na.rm = TRUE)) %>%
      mutate(type = "media")
  ) %>%
    return()
}
```

### 5.3.1 Mean Committees

```{r, message = FALSE, warning = FALSE}
# Calculate difference in average connectivity between key groups
committee <- read_rds("data/matrix.rds") %>% 
  filter(weight > 0) %>%
  get_meandegree() %>%
  filter(measure %in% c("yes", "woman")) %>%
  select(-measure) %>%
  pivot_wider(
    id_cols = c(),
    names_from = type,
    values_from = mwdeg) %>%
  summarize(
    diff_com_gender = community_participation_relig - gender,
    diff_com_research = community_participation_relig - research,
    diff_com_govt = community_participation_relig - govt,
    diff_com_politician = community_participation_relig - politician,
    diff_com_business = community_participation_relig - business,
    diff_com_agriculture = community_participation_relig - agriculture,
    diff_com_fisheries = community_participation_relig - fisheries,
    diff_com_health_care = community_participation_relig - health_care,
    diff_com_emergency_services = community_participation_relig - emergency_services,
    diff_com_media = community_participation_relig - media) %>%
  pivot_longer(
    cols = -c(),
    names_to = "measure",
    values_to = "stat") %>%
  # Join in permuted statistics
  left_join(by = "measure",
            y =  read_rds("perm/null_meandegree.rds") %>% 
              filter(measure %in% c("yes", "woman")) %>%
              select(-measure) %>%
              pivot_wider(
                id_cols = c(replicate),
                names_from = type,
                values_from = mwdeg) %>%
              group_by(replicate) %>%
              summarize(
                diff_com_gender = community_participation_relig - gender,
                diff_com_research = community_participation_relig - research,
                diff_com_govt = community_participation_relig - govt,
                diff_com_politician = community_participation_relig - politician,
                diff_com_business = community_participation_relig - business,
                diff_com_agriculture = community_participation_relig - agriculture,
                diff_com_fisheries = community_participation_relig - fisheries,
                diff_com_health_care = community_participation_relig - health_care,
                diff_com_emergency_services = community_participation_relig - emergency_services,
                diff_com_media = community_participation_relig - media) %>%
              pivot_longer(
                cols = -replicate,
                names_to = "measure",
                values_to = "permuted")) %>%
  group_by(type = measure) %>%
  mutate(permuted = permuted - mean(permuted, na.rm = TRUE),
         stat_zero = stat - mean(permuted, na.rm = TRUE)) %>%
  summarize(
    measure = "diff_com",
    stat = mean(stat),
    p_value = sum(abs(stat_zero) <= abs(permuted)) / n())

```

### 5.3.2 Mean Centrality

```{r}
# Gather p-values for centrality by member type
obs <- read_rds("data/matrix.rds") %>%
  # Get centrality measures by node types
  get_mean_centrality() %>%
  # Pivot long, so we can calculate many at once
  pivot_longer(
    cols = -type,
    names_to = "measure",
    values_to = "stat") %>%
  # Pivot wider, to get a column for each type
  pivot_wider(
    id_cols = measure,
    names_from = type,
    values_from = stat) %>%
  # Calculate difference in connectivity
  # members who represent community interests versus the a different interest group
    group_by(measure) %>%
  summarize(
    diff_com_gender = community_participation_relig - gender,
    diff_com_research = community_participation_relig - research,
    diff_com_govt = community_participation_relig - govt,
    diff_com_politician = community_participation_relig - politician,
    diff_com_business = community_participation_relig - business,
    diff_com_agriculture = community_participation_relig - agriculture,
    diff_com_fisheries = community_participation_relig - fisheries,
    diff_com_health_care = community_participation_relig - health_care,
    diff_com_emergency_services = community_participation_relig - emergency_services,
    diff_com_media = community_participation_relig - media) %>%
  # Pivot longer for tidy calculations
  pivot_longer(
    cols = -c(measure),
    names_to = "type",
    values_to = "stat") 


  
null <- read_rds("perm/null_mean_centrality.rds") %>%
 # Pivot long, so we can calculate many at once
  pivot_longer(
    cols = -c(replicate, type),
    names_to = "measure",
    values_to = "stat") %>%
  # Pivot wider, to get a column for each type
  pivot_wider(
    id_cols = c(replicate, measure),
    names_from = type,
    values_from = stat) %>%
  # Calculate difference in connectivity
  # members who represent community interests versus the a different interest group
    group_by(replicate, measure) %>%
  summarize(
    diff_com_gender = community_participation_relig - gender,
    diff_com_research = community_participation_relig - research,
    diff_com_govt = community_participation_relig - govt,
    diff_com_politician = community_participation_relig - politician,
    diff_com_business = community_participation_relig - business,
    diff_com_agriculture = community_participation_relig - agriculture,
    diff_com_fisheries = community_participation_relig - fisheries,
    diff_com_health_care = community_participation_relig - health_care,
    diff_com_emergency_services = community_participation_relig - emergency_services,
    diff_com_media = community_participation_relig - media) %>%
  # Pivot longer for tidy calculations
  pivot_longer(
    cols = -c(replicate, measure),
    names_to = "type",
    values_to = "stat") 

centrality <- obs %>%
# Join in statistics from permuted networks
  left_join(by = c("measure", "type"),
            y = null %>%
              rename(permuted = stat)) %>% 
  group_by(measure, type) %>%
  mutate(permuted = permuted - mean(permuted, na.rm = TRUE),
    stat_zero = stat - mean(permuted, na.rm = TRUE))  %>%
  summarize(
    stat = mean(stat, na.rm = TRUE),
    p_value = sum(abs(stat_zero) <= abs(permuted)) / n() )
remove(obs, null)
```

### 5.3.3 Moran's I

```{r}
# Get node outcome
dat <- read_csv("data/members_imputed.csv") %>%
  select(member_id, community_participation_relig, community_participation, 
         social_org, influential_citizen, religious, gender,
         research, govt, politician, business, agriculture, fisheries,
         health_care, emergency_services, media) %>%
  mutate_at(vars(-member_id),
            funs(if_else(. %in% c("yes", "woman"), 1, 0))) %>%
  # Pivot longer
  pivot_longer(
    cols = -member_id,
    names_to = "type",
    values_to = "value")  %>%
  arrange(member_id)

# Now let's also grab these differences for Moran's I
moran <- read_rds("data/matrix.rds") %>%
  # Get Moran's I statistics for observed network
  get_many_moran() %>%
  pivot_wider(
    id_cols = c(measure),
    names_from = type,
    values_from = stat) %>%
  # Calculate difference in network autocorrelation for 
  # members who represent community interests versus the a different interest group
  summarize(
    diff_com_gender = community_participation_relig - gender,
    diff_com_research = community_participation_relig - research,
    diff_com_govt = community_participation_relig - govt,
    diff_com_politician = community_participation_relig - politician,
    diff_com_business = community_participation_relig - business,
    diff_com_agriculture = community_participation_relig - agriculture,
    diff_com_fisheries = community_participation_relig - fisheries,
    diff_com_health_care = community_participation_relig - health_care,
    diff_com_emergency_services = community_participation_relig - emergency_services,
    diff_com_media = community_participation_relig - media) %>%
  # Pivot longer for tidy calculations
  pivot_longer(
    cols = -c(),
    names_to = "type",
    values_to = "stat") %>%
  
  # Now, gather and join in the permuted differences in network autocorrelation too,
  # so that we can eventually calculate p-value
  
  left_join(by = "type",
            y = read_rds("perm/null_moran.rds") %>%
              # Get Moran's I statistics for observed network
              pivot_wider(
                id_cols = c(replicate,measure),
                names_from = type,
                values_from = stat) %>%
              # Calculate difference in network autocorrelation for 
              # members who represent community interests versus the a different interest group
              group_by(replicate) %>%
              summarize(
                diff_com_gender = community_participation_relig - gender,
                diff_com_research = community_participation_relig - research,
                diff_com_govt = community_participation_relig - govt,
                diff_com_politician = community_participation_relig - politician,
                diff_com_business = community_participation_relig - business,
                diff_com_agriculture = community_participation_relig - agriculture,
                diff_com_fisheries = community_participation_relig - fisheries,
                diff_com_health_care = community_participation_relig - health_care,
                diff_com_emergency_services = community_participation_relig - emergency_services,
                diff_com_media = community_participation_relig - media) %>%
              pivot_longer(
                cols = -c(replicate),
                names_to = "type",
                values_to = "stat") %>%
              rename(permuted = stat)) %>%
  # Calculate p-value
  group_by(type) %>%
  mutate(permuted = permuted - mean(permuted, na.rm = TRUE),
         stat_zero = stat - mean(permuted, na.rm = TRUE)) %>%
  summarize(
    measure = "moran",
    stat = mean(stat),
    p_value = sum(abs(stat_zero) <= abs(permuted)) / n())

remove(dat)
```

### 5.5.4 Difference Table

```{r}

library(flextable)
library(officer)
bind_rows(committee, centrality, moran) %>%
  pivot_wider(
    id_cols = type,
    names_from = measure,
    values_from = c(stat, p_value)) %>%
  mutate(type = type %>% recode_factor(
    "diff_com_business" = "Business",
    "diff_com_agriculture" = "Agriculture",
    "diff_com_fisheries" = "Fisheries",
    "diff_com_govt" = "Government Bureaucrats",
    "diff_com_politician" = "Elected Officials",
    "diff_com_research" = "Engineers & Researchers",
    "diff_com_health_care" = "Health Care",
    "diff_com_emergency_services" = "Emergency Services",
    "diff_com_media" = "Media",
    "diff_com_gender" = "Women")) %>%
  mutate_at(vars(stat_diff_com, stat_moran, stat_between, stat_strength),
            funs(round(., 2))) %>%
  mutate_at(vars(p_value_diff_com, p_value_moran, 
                 p_value_between, p_value_strength),
            funs(gtools::stars.pval(.))) %>%
  arrange(type) %>%
  # Make a table
  flextable(
    col_keys = c("type", "stat_diff_com", "p_value_diff_com",
                 "stat_strength", "p_value_strength",
                 "stat_between", "p_value_between",
                 "stat_moran", "p_value_moran")) %>%
  # Set column labels
  set_header_labels(
    "type" = "Interest Group",
    "stat_diff_com" = "Difference in\nAvg. # of Committees",
    "p_value_diff_com" = "p-value",
    "stat_strength" = "Difference in\nAvg. Member Ties",
    "p_value_strength" = "p-value",
    "stat_between" = "Difference in\nBridging Capacity",
    "p_value_between" = "p-value",
    "stat_moran" = "Difference in \nNetwork Autocorrelation",
    "p_value_moran" = "p-value") %>%
  # Add bolding
  bold(~ p_value_diff_com %in% c("***", "**", "*", "."), ~ stat_diff_com) %>%
  bold(~ p_value_moran %in% c("***", "**", "*", "."), ~ stat_moran) %>%
  bold(~ p_value_strength %in% c("***", "**", "*", "."), ~ stat_strength) %>%
  bold(~ p_value_between %in% c("***", "**", "*", "."), ~ stat_between) %>%
  # Add color for direction
  color(~stat_diff_com > 0, ~ stat_diff_com, color = "steelblue") %>%
  color(~stat_diff_com < 0, ~ stat_diff_com, color = "firebrick") %>%
  color(~stat_moran > 0, ~ stat_moran, color = "steelblue") %>%
  color(~stat_moran < 0, ~ stat_moran, color = "firebrick") %>%
  color(~stat_strength > 0, ~ stat_strength, color = "steelblue") %>%
  color(~stat_strength < 0, ~ stat_strength, color = "firebrick") %>%
  color(~stat_between > 0, ~ stat_between, color = "steelblue") %>%
  color(~stat_between < 0, ~ stat_between, color = "firebrick") %>%
 
   # Add interpretation of interest groups
  footnote(i = 1, j = ~type,
          value = as_paragraph(
              c("Depicts difference in connectivity for members if they represented NGOs, influential citizens, or religious orgs compared to the specific interest group.")), 
          ref_symbols = c("a"), part = "header") %>%
  # Add interpretation of avg. committees
 footnote(i = 1, j = ~stat_diff_com,
          value = as_paragraph(
              c("Depicts on average, how many more/fewer committees members participated in.")), 
          ref_symbols = c("b"), part = "header") %>%
  # Add interpretation of strength
 footnote(i = 1, j = ~stat_strength,
          value = as_paragraph(
              c("Depicts on average, how many more/fewer ties to other members did members of this interest group have.")), 
          ref_symbols = c("c"), part = "header") %>%
  # Add interpretation of betweenness
   footnote(i = 1, j = ~stat_between,
          value = as_paragraph(
              c("Depicts on average, how many more/fewer shortest paths through the network went through members of this interest group.")), 
          ref_symbols = c("d"), part = "header") %>%
  # Add interpretation of moran's I
   footnote(i = 1, j = ~stat_moran,
          value = as_paragraph(
              c("Depicts difference in network autocorrelation, how much members of the same type are connected to each other.")), 
          ref_symbols = c("e"), part = "header") %>%
    # Add interpr
    # Add interpretation of p-values
  footnote(i = 1, j = ~p_value_moran,
          value = as_paragraph(
              c("*** p < 0.001, ** p < 0.01, * p < 0.05, . p < 0.10. P-values calculated from permutations of bipartite network and resulting traits.")), 
          ref_symbols = c("f"), part = "header") %>%
  valign(valign = "bottom", part = "header") %>%
   set_caption(caption = "Difference in Connectivity between Community Representatives vs. Interest Groups") %>%
  autofit()
# Difference in average connectivity of members who are researchers vs. community representatives
```


```{r}
remove(out, set1, set2,get_meandegree)

```


# 6. Extended Degree

```{r, message = FALSE, warning = FALSE}

# We might also want to know how many people each kind of node is connected to,
# directly and indirectly
get_many_neighbors = function(data){
  print(paste(data$replicate[1]))
  
  # Using original/permuted member-committee edglist,
  # Get an incidence matrix
  temp <- expand_grid(
    member_id = read_csv("data/members.csv")$member_id,
    committee_id = read_csv("data/committees.csv")$committee_id) %>%
    # Join in edges and weights
    left_join(by = c("member_id", "committee_id"),
              # Join in temporary edgelist,
              # from permutation
              y = data %>%
                filter(weight > 0) %>%
                select(member_id, committee_id, weight)) %>%
    mutate(weight = if_else(is.na(weight), 
                            true = 0, 
                            false = as.numeric(weight))) %>%
    # pivot into a wide incidence matrix
    pivot_wider(
      id_cols = member_id,
      names_from = committee_id,
      values_from = weight) %>%
    tibble::column_to_rownames(var = "member_id") %>%
    as.matrix() 
  
  # Generate member coaffiliation network edgelist
  temp <- temp %*% t(temp) %>%
    # remove the diagonal
    diag.remove(remove.val = 0) %>%
    as.data.frame() %>%
    tibble::rownames_to_column(var = "from") %>%
    # Turn into edgelist
    pivot_longer(
      cols = -from,
      names_to = "to",
      values_to = "weight") %>%
    filter(weight > 0) %>%
    tbl_graph(nodes = read_csv("data/members.csv"), directed = FALSE) %>%
    as.igraph()
  
  # Use the following short function to calculate number of neighbors
  # per vertex
  get_neighbors = function(number){
    data.frame(
      member_id = igraph::V(temp)$member_id,
      measure = as.character(number),
      stat = temp %>%
        igraph::neighborhood.size(
          order = number, mode = "all", mindist = 0)) %>%
      return()
  }
  
  # Get first through third degree number of neighbors
  c(1:3) %>%
    lapply(get_neighbors) %>%
    bind_rows() %>%
    # Bind in Betweenness and Strength
    bind_rows(
      # Get Betweenness
      data.frame(
        member_id = igraph::V(temp)$member_id,
        measure = "between",
        stat = temp %>%
          igraph::betweenness(weights = igraph::E(temp)$weight, 
                              directed = FALSE)),
      # Get Strength
      data.frame(
        member_id = igraph::V(temp)$member_id,
        measure = "strength",
        stat = temp %>%
          igraph::strength(loops = FALSE, mode = "total", 
                           weights = igraph::E(temp)$weight))
    ) %>%
    # Pivot wider, so you get fewer rows in the end
    pivot_wider(
      id_cols = member_id,
      names_from = measure,
      values_from = stat) %>%
  return()
}


read_rds("perm/block_perm.rds") %>%
  split(.$replicate) %>%
  map_dfr(. %>% get_many_neighbors, .id = "replicate") %>%
  saveRDS("perm/null_neighbors.rds")
```


```{r}
# Get first, second, and third degree neighbors
out <- read_rds("data/matrix.rds") %>%
  get_many_neighbors() %>%
  pivot_longer(
    cols = -c(member_id),
    names_to = "order",
    values_to = "stat") %>%
  left_join(by = c("member_id", "order"),
            y = read_rds("perm/null_neighbors.rds") %>%
              pivot_longer(
                cols = -c(replicate, member_id),
                names_to = "order",
                values_to = "stat") %>%
              rename(permuted = stat)) %>%
  # Get p-values from permuted stats
  # remember to mean center
  group_by(member_id, order) %>%
  mutate(permuted = permuted - mean(permuted),
         stat_zero = stat - mean(permuted)) %>%
  summarize(stat = mean(stat, na.rm = TRUE),
            p_value = sum(abs(stat_zero) <= abs(permuted)) / n())


out %>%
  filter(order %in% c(1,2,3)) %>%
  mutate(order = order %>% factor() %>% recode_factor(
    "between" = "Betweenness",
    "strength" = "Co-Membership Ties",
    "3" = "Third Degree Ties",
    "2" = "Second Degree Ties",
    "1" = "First Degree Ties"),
    member_id = member_id %>% factor() %>% recode_factor(
      "name_1" = "Dr. Eichi Engineer (#1)", 
      "name_59" = "Mr. Takeshi Train (#59)", 
      "name_68" = "Mr. Hiroki Highway (#68)")) %>%
  ggplot(mapping = aes(x = order, y= stat, 
                       group = order, fill = order)) +
  # Display the observed points
  geom_jitter(color = "grey", alpha = 0.25) +
  # Show degree/neighbor distribution
  geom_violin(scale = "width", alpha = 0.5, fill = "grey") +
  # Show actual points
  geom_jitter(data = . %>%
               filter(member_id %in% c("Dr. Eichi Engineer (#1)", 
                                       "Mr. Takeshi Train (#59)", 
                                       "Mr. Hiroki Highway (#68)")),
             mapping = aes(x = order, y = stat,
                           group = member_id, color = member_id),
             size = 3,
             height = 0.1, width = 0.1) +
  coord_flip() +
  theme_classic() +
  theme(panel.border = element_rect(fill= NA, color = "black"),
        plot.caption = element_text(hjust = 0.5)) +
  scale_color_viridis(option  = "plasma", discrete = TRUE,
                     begin = .10, end = 0.8) +
  labs(
    color = "Top 3 Members",
    x = "", y = "# of Neighbors linked by Committee Ties",
       caption = "Permutations of the bipartite network show that\nthese degree estimates are each statistically significant at p < 0.001.") +
  guides(fill = FALSE) 
  
```



```{r}

out %>% 
  filter(member_id %in% c("name_1", "name_59", "name_68")) %>%
  pivot_wider(
    id_cols = member_id,
    names_from = order,
    values_from = c(stat, p_value)) %>%
  mutate(stat_1 = paste(stat_1,
                               gtools::stars.pval(p_value_1),
                               sep = ""),
         stat_2 = paste(stat_2,
                               gtools::stars.pval(p_value_2),
                               sep = ""),
         stat_3 = paste(stat_3,
                               gtools::stars.pval(p_value_3),
                               sep = ""),
         stat_strength = paste(stat_strength,
                               gtools::stars.pval(p_value_strength),
                               sep = ""),
         stat_between = paste(round(stat_between,0),
                              gtools::stars.pval(p_value_between),
                              sep = "")) %>%
  mutate(number = member_id %>% dplyr::recode(
    "name_1" =  "1",
    "name_59" = "59",
    "name_68" = "68") %>% as.numeric()) %>% 
  mutate(member_id = member_id %>% factor() %>% recode_factor(
      "name_1" = "Dr. Eichi Engineer", 
      "name_59" = "Mr. Takeshi Train", 
      "name_68" = "Mr. Hiroki Highway")) %>%
  select(member_id,number,
         stat_1, stat_2, stat_3, stat_strength, stat_between) %>%
flextable(
    col_keys = c("member_id", "number", "stat_1", "stat_2",
                 "stat_3", "stat_strength", "stat_between")) %>%
  # Set column labels
  set_header_labels(
    "member_id" = "Member",
    "number" = "ID No.",
    "stat_1" = "First Degree Ties",
    "stat_2" = "Second Degree Ties",
    "stat_3" = "Third Degree Ties",
    "stat_strength" = "Co-Member Ties",
    "stat_between" = "Bridging Capacity") %>%
    
# Add interpretation of moran's I
   footnote(i = 1, j = ~stat_between,
          value = as_paragraph(
              c(" *** p < 0.001, ** p < 0.01, * p < 0.05. P-values calculated by permuting bipartite network.")), 
          ref_symbols = c("a"), part = "header") 
```


# 7. Network Visuals


## 7.1 Committee Network

```{r}


read_rds("data/edgelist_committees.rds") %>%
  filter(weight > 0) %>%
  tbl_graph(directed = FALSE,
            nodes = read_csv("data/committees.csv")) %>%
  filter(!node_is_isolated()) %>%
  activate(nodes) %>%
  mutate(degree = centrality_degree(
    weights = weight, mode = "total", loops = FALSE, normalized = FALSE),
    geography = paste(str_sub(geography, 0,1) %>% toupper,
                      str_sub(geography, 2, -1), sep = "")) %>%
  activate(edges) %>%
  rename(`# Members shared in Common` = weight) %>%
  ggraph(layout = "fr") +
  geom_edge_link(mapping = aes(width = `# Members shared in Common`), color = "grey", alpha = 0.5) +
  geom_node_point(mapping = aes(color = geography, size = degree), alpha = 0.75) +
  theme_void(base_size = 12) +
  theme(legend.position = "bottom", legend.box = "vertical") +
  scale_color_viridis(option = "plasma", begin = 0.1, end = 0.8, discrete = TRUE) +
  labs(
    color = "Geography",
    size = "# Co-membership ties") +
  ggsave(filename = "plot2.png")

```

## 7.2 Member Network

```{r}
library(tidyverse)
library(tidygraph)
library(ggraph)
library(viridis)

colors <- viridis(4, option = "plasma", begin = 0.1, end = 0.8)


set.seed(12345)

g <- read_rds("data/edgelist_members.rds") %>%
    filter(weight > 0) %>%
    # Convert to tidygraph
    tbl_graph(directed = FALSE,
              nodes = read_csv("data/members.csv") %>%
                # Create a vector indicating 
                mutate(type = if_else(
                  research == "yes", 
                  true = "Engineer / Researcher", 
                  false = "Other", missing = "Other")) %>%
                select(member_id, type)) %>%
    activate(nodes) %>%
    # Because degree is edgewise - we need to divide by 2
    mutate(degree = centrality_degree(
      weights = weight, 
      mode = "total", loops = FALSE, normalized = FALSE)) %>%
    activate(edges) %>%
    mutate(version = "Experts") %>%
  # Filter to nodes greater than 30,
  # So that we exclude members just connected to their own isolated committee
  activate(nodes) %>%
  filter(degree >= 30)

set.seed(12345)

g %>%
  ggraph(layout = "fr") +
  geom_edge_diagonal(color = "grey", alpha = 0.5) +
  geom_node_point(mapping = aes(color = type, size = degree), alpha = 0.5) +
  theme_void() +
  theme(legend.position = "bottom", legend.box = "vertical") +
  scale_color_viridis(option = "plasma", begin = 0.1, end = 0.8, discrete = TRUE) +
  labs(
    color = "Member Type",
    size = "# of Co-Membership Ties") +
  ggsave(filename = "plot_researchers.png", height = 5, width = 5)

```

```{r}
# This is the government network, in case you want it.
g <- read_rds("data/edgelist_members.rds") %>%
    filter(weight > 0) %>%
    # Convert to tidygraph
    tbl_graph(directed = FALSE,
              nodes = read_csv("data/members.csv") %>%
                # Create a vector indicating 
                mutate(type = if_else(
                  govt == "yes", 
                  true = "Government Bureaucrat", 
                  false = "Other", missing = "Other")) %>%
                select(member_id, type)) %>%
    activate(nodes) %>%
    # Because degree is edgewise - we need to divide by 2
    mutate(degree = centrality_degree(
      weights = weight, 
      mode = "total", loops = FALSE, normalized = FALSE)) %>%
    activate(edges) %>%
    mutate(version = "Experts") %>%
  # Filter to nodes greater than 30,
  # So that we exclude members just connected to their own isolated committee
  activate(nodes) %>%
  filter(degree >= 30)

set.seed(12345)

g %>%
  ggraph(layout = "fr") +
  geom_edge_diagonal(color = "grey", alpha = 0.5) +
  geom_node_point(mapping = aes(color = type, size = degree), alpha = 0.5) +
  theme_void() +
  theme(legend.position = "bottom", legend.box = "vertical") +
  scale_color_viridis(option = "plasma", begin = 0.1, end = 0.8, discrete = TRUE) +
  labs(
    color = "Member Type",
    size = "# of Co-Membership Ties") +
  ggsave(filename = "plot_govt.png", height = 5, width = 5)


```
```{r}
remove(g, colors)
```
